{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 作业1.1\n",
    "### 根据所给的训练数据集，利用信息增益和信息增益比分别生成决策树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = pd.read_csv('homework_1.1_dataset.txt', sep='\\t')\n",
    "dataset = dataset.T\n",
    "dataset = dataset[1:].T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>outlook</th>\n",
       "      <th>temp</th>\n",
       "      <th>humid</th>\n",
       "      <th>wind</th>\n",
       "      <th>tennis</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>sun</td>\n",
       "      <td>hot</td>\n",
       "      <td>high</td>\n",
       "      <td>weak</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sun</td>\n",
       "      <td>hot</td>\n",
       "      <td>high</td>\n",
       "      <td>strong</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>cloud</td>\n",
       "      <td>hot</td>\n",
       "      <td>high</td>\n",
       "      <td>weak</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>rain</td>\n",
       "      <td>mild</td>\n",
       "      <td>high</td>\n",
       "      <td>weak</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>rain</td>\n",
       "      <td>cool</td>\n",
       "      <td>norm</td>\n",
       "      <td>weak</td>\n",
       "      <td>yes</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  outlook  temp humid    wind tennis\n",
       "0     sun   hot  high    weak     no\n",
       "1     sun   hot  high  strong     no\n",
       "2   cloud   hot  high    weak    yes\n",
       "3    rain  mild  high    weak    yes\n",
       "4    rain  cool  norm    weak    yes"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对数据量化处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "outlook_mapping = {label: idx+1 for idx, label in enumerate(np.unique(dataset['outlook']))}\n",
    "temp_mapping = {label: idx+1 for idx, label in enumerate(np.unique(dataset['temp']))}\n",
    "humid_mapping = {label: idx+1 for idx, label in enumerate(np.unique(dataset['humid']))}\n",
    "wind_mapping = {label: idx+1 for idx, label in enumerate(np.unique(dataset['wind']))}\n",
    "tennis_mapping = {label: idx for idx, label in enumerate(np.unique(dataset['tennis']))}\n",
    "dataset['outlook'] = dataset['outlook'].map(outlook_mapping)\n",
    "dataset['temp'] = dataset['temp'].map(temp_mapping)\n",
    "dataset['humid'] = dataset['humid'].map(humid_mapping)\n",
    "dataset['wind'] = dataset['wind'].map(wind_mapping)\n",
    "dataset['tennis'] = dataset['tennis'].map(tennis_mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>outlook</th>\n",
       "      <th>temp</th>\n",
       "      <th>humid</th>\n",
       "      <th>wind</th>\n",
       "      <th>tennis</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   outlook  temp  humid  wind  tennis\n",
       "0        3     2      1     2       0\n",
       "1        3     2      1     1       0\n",
       "2        1     2      1     2       1\n",
       "3        2     3      1     2       1\n",
       "4        2     1      2     2       1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对应的colume量化字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'cloud': 1,\n",
       " 'rain': 2,\n",
       " 'sun': 3,\n",
       " 'cool': 1,\n",
       " 'hot': 2,\n",
       " 'mild': 3,\n",
       " 'high': 1,\n",
       " 'norm': 2,\n",
       " 'strong': 1,\n",
       " 'weak': 2,\n",
       " 'no': 0,\n",
       " 'yes': 1}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_mapping = {**outlook_mapping, **temp_mapping, **humid_mapping, **wind_mapping, **tennis_mapping}\n",
    "data_mapping"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 得到训练数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.array(dataset)\n",
    "X_train, y_train = data[:, :-1] , data[:, -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.tree import export_graphviz\n",
    "import graphviz\n",
    "import pydotplus\n",
    "\n",
    "import os\n",
    "os.environ[\"PATH\"] += os.pathsep + 'E:/DevSoft/Graphviz2.38/bin'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 采用信息增益，即C4.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,\n",
       "            max_features=None, max_leaf_nodes=None,\n",
       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "            min_samples_leaf=1, min_samples_split=2,\n",
       "            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
       "            splitter='best')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_tree = DecisionTreeClassifier(criterion='entropy')\n",
    "model_tree.fit(X_train, y_train,)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 可视化决策树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
       "<!-- Generated by graphviz version 2.38.0 (20140413.2041)\r\n",
       " -->\r\n",
       "<!-- Title: Tree Pages: 1 -->\r\n",
       "<svg width=\"529pt\" height=\"552pt\"\r\n",
       " viewBox=\"0.00 0.00 529.00 552.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 548)\">\r\n",
       "<title>Tree</title>\r\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-548 525,-548 525,4 -4,4\"/>\r\n",
       "<!-- 0 -->\r\n",
       "<g id=\"node1\" class=\"node\"><title>0</title>\r\n",
       "<path fill=\"#399de5\" fill-opacity=\"0.443137\" stroke=\"black\" d=\"M269.5,-544C269.5,-544 183.5,-544 183.5,-544 177.5,-544 171.5,-538 171.5,-532 171.5,-532 171.5,-473 171.5,-473 171.5,-467 177.5,-461 183.5,-461 183.5,-461 269.5,-461 269.5,-461 275.5,-461 281.5,-467 281.5,-473 281.5,-473 281.5,-532 281.5,-532 281.5,-538 275.5,-544 269.5,-544\"/>\r\n",
       "<text text-anchor=\"start\" x=\"183\" y=\"-528.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">outlook ≤ 1.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"179.5\" y=\"-513.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.94</text>\r\n",
       "<text text-anchor=\"start\" x=\"182.5\" y=\"-498.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 14</text>\r\n",
       "<text text-anchor=\"start\" x=\"185\" y=\"-483.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [5, 9]</text>\r\n",
       "<text text-anchor=\"start\" x=\"197.5\" y=\"-468.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 1 -->\r\n",
       "<g id=\"node2\" class=\"node\"><title>1</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M205,-417.5C205,-417.5 128,-417.5 128,-417.5 122,-417.5 116,-411.5 116,-405.5 116,-405.5 116,-361.5 116,-361.5 116,-355.5 122,-349.5 128,-349.5 128,-349.5 205,-349.5 205,-349.5 211,-349.5 217,-355.5 217,-361.5 217,-361.5 217,-405.5 217,-405.5 217,-411.5 211,-417.5 205,-417.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"124\" y=\"-402.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"127\" y=\"-387.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 4</text>\r\n",
       "<text text-anchor=\"start\" x=\"125\" y=\"-372.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 4]</text>\r\n",
       "<text text-anchor=\"start\" x=\"137.5\" y=\"-357.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 0&#45;&gt;1 -->\r\n",
       "<g id=\"edge1\" class=\"edge\"><title>0&#45;&gt;1</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M205.683,-460.907C200.025,-449.873 193.883,-437.898 188.178,-426.773\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"191.186,-424.968 183.509,-417.667 184.958,-428.162 191.186,-424.968\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"175.845\" y=\"-437.763\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\r\n",
       "</g>\r\n",
       "<!-- 2 -->\r\n",
       "<g id=\"node3\" class=\"node\"><title>2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M327.5,-425C327.5,-425 247.5,-425 247.5,-425 241.5,-425 235.5,-419 235.5,-413 235.5,-413 235.5,-354 235.5,-354 235.5,-348 241.5,-342 247.5,-342 247.5,-342 327.5,-342 327.5,-342 333.5,-342 339.5,-348 339.5,-354 339.5,-354 339.5,-413 339.5,-413 339.5,-419 333.5,-425 327.5,-425\"/>\r\n",
       "<text text-anchor=\"start\" x=\"248\" y=\"-409.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">humid ≤ 1.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"245\" y=\"-394.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"243.5\" y=\"-379.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 10</text>\r\n",
       "<text text-anchor=\"start\" x=\"246\" y=\"-364.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [5, 5]</text>\r\n",
       "<text text-anchor=\"start\" x=\"258.5\" y=\"-349.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 0&#45;&gt;2 -->\r\n",
       "<g id=\"edge2\" class=\"edge\"><title>0&#45;&gt;2</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M247.664,-460.907C252.206,-452.195 257.054,-442.897 261.748,-433.893\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"264.854,-435.506 266.374,-425.021 258.647,-432.27 264.854,-435.506\"/>\r\n",
       "<text text-anchor=\"middle\" x=\"273.876\" y=\"-445.168\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\r\n",
       "</g>\r\n",
       "<!-- 3 -->\r\n",
       "<g id=\"node4\" class=\"node\"><title>3</title>\r\n",
       "<path fill=\"#e58139\" fill-opacity=\"0.749020\" stroke=\"black\" d=\"M266.5,-306C266.5,-306 172.5,-306 172.5,-306 166.5,-306 160.5,-300 160.5,-294 160.5,-294 160.5,-235 160.5,-235 160.5,-229 166.5,-223 172.5,-223 172.5,-223 266.5,-223 266.5,-223 272.5,-223 278.5,-229 278.5,-235 278.5,-235 278.5,-294 278.5,-294 278.5,-300 272.5,-306 266.5,-306\"/>\r\n",
       "<text text-anchor=\"start\" x=\"176\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">outlook ≤ 2.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"168.5\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.722</text>\r\n",
       "<text text-anchor=\"start\" x=\"180\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 5</text>\r\n",
       "<text text-anchor=\"start\" x=\"178\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [4, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"190.5\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;3 -->\r\n",
       "<g id=\"edge3\" class=\"edge\"><title>2&#45;&gt;3</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M263.908,-341.907C258.844,-333.195 253.44,-323.897 248.207,-314.893\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"251.102,-312.908 243.051,-306.021 245.049,-316.425 251.102,-312.908\"/>\r\n",
       "</g>\r\n",
       "<!-- 8 -->\r\n",
       "<g id=\"node9\" class=\"node\"><title>8</title>\r\n",
       "<path fill=\"#399de5\" fill-opacity=\"0.749020\" stroke=\"black\" d=\"M402.5,-306C402.5,-306 308.5,-306 308.5,-306 302.5,-306 296.5,-300 296.5,-294 296.5,-294 296.5,-235 296.5,-235 296.5,-229 302.5,-223 308.5,-223 308.5,-223 402.5,-223 402.5,-223 408.5,-223 414.5,-229 414.5,-235 414.5,-235 414.5,-294 414.5,-294 414.5,-300 408.5,-306 402.5,-306\"/>\r\n",
       "<text text-anchor=\"start\" x=\"320.5\" y=\"-290.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">wind ≤ 1.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"304.5\" y=\"-275.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.722</text>\r\n",
       "<text text-anchor=\"start\" x=\"316\" y=\"-260.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 5</text>\r\n",
       "<text text-anchor=\"start\" x=\"314\" y=\"-245.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 4]</text>\r\n",
       "<text text-anchor=\"start\" x=\"326.5\" y=\"-230.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 2&#45;&gt;8 -->\r\n",
       "<g id=\"edge8\" class=\"edge\"><title>2&#45;&gt;8</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M311.092,-341.907C316.156,-333.195 321.56,-323.897 326.793,-314.893\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"329.951,-316.425 331.949,-306.021 323.898,-312.908 329.951,-316.425\"/>\r\n",
       "</g>\r\n",
       "<!-- 4 -->\r\n",
       "<g id=\"node5\" class=\"node\"><title>4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M144,-187C144,-187 67,-187 67,-187 61,-187 55,-181 55,-175 55,-175 55,-116 55,-116 55,-110 61,-104 67,-104 67,-104 144,-104 144,-104 150,-104 156,-110 156,-116 156,-116 156,-175 156,-175 156,-181 150,-187 144,-187\"/>\r\n",
       "<text text-anchor=\"start\" x=\"70.5\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">wind ≤ 1.5</text>\r\n",
       "<text text-anchor=\"start\" x=\"63\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"66\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 2</text>\r\n",
       "<text text-anchor=\"start\" x=\"64\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"76.5\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 3&#45;&gt;4 -->\r\n",
       "<g id=\"edge4\" class=\"edge\"><title>3&#45;&gt;4</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M179.948,-222.907C170.929,-213.651 161.265,-203.732 151.986,-194.209\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"154.467,-191.741 144.982,-187.021 149.454,-196.626 154.467,-191.741\"/>\r\n",
       "</g>\r\n",
       "<!-- 7 -->\r\n",
       "<g id=\"node8\" class=\"node\"><title>7</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M263,-179.5C263,-179.5 186,-179.5 186,-179.5 180,-179.5 174,-173.5 174,-167.5 174,-167.5 174,-123.5 174,-123.5 174,-117.5 180,-111.5 186,-111.5 186,-111.5 263,-111.5 263,-111.5 269,-111.5 275,-117.5 275,-123.5 275,-123.5 275,-167.5 275,-167.5 275,-173.5 269,-179.5 263,-179.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"182\" y=\"-164.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"185\" y=\"-149.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 3</text>\r\n",
       "<text text-anchor=\"start\" x=\"183\" y=\"-134.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [3, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"195.5\" y=\"-119.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 3&#45;&gt;7 -->\r\n",
       "<g id=\"edge7\" class=\"edge\"><title>3&#45;&gt;7</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M221.235,-222.907C221.692,-212.204 222.187,-200.615 222.651,-189.776\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"226.152,-189.807 223.083,-179.667 219.159,-189.508 226.152,-189.807\"/>\r\n",
       "</g>\r\n",
       "<!-- 5 -->\r\n",
       "<g id=\"node6\" class=\"node\"><title>5</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M89,-68C89,-68 12,-68 12,-68 6,-68 0,-62 0,-56 0,-56 0,-12 0,-12 0,-6 6,-0 12,-0 12,-0 89,-0 89,-0 95,-0 101,-6 101,-12 101,-12 101,-56 101,-56 101,-62 95,-68 89,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"8\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"11\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"9\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"21.5\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 4&#45;&gt;5 -->\r\n",
       "<g id=\"edge5\" class=\"edge\"><title>4&#45;&gt;5</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M85.02,-103.726C80.7137,-95.1527 76.1612,-86.0891 71.8247,-77.4555\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"74.842,-75.6647 67.2258,-68.2996 68.5867,-78.8067 74.842,-75.6647\"/>\r\n",
       "</g>\r\n",
       "<!-- 6 -->\r\n",
       "<g id=\"node7\" class=\"node\"><title>6</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M208,-68C208,-68 131,-68 131,-68 125,-68 119,-62 119,-56 119,-56 119,-12 119,-12 119,-6 125,-0 131,-0 131,-0 208,-0 208,-0 214,-0 220,-6 220,-12 220,-12 220,-56 220,-56 220,-62 214,-68 208,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"127\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"130\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"128\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"140.5\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 4&#45;&gt;6 -->\r\n",
       "<g id=\"edge6\" class=\"edge\"><title>4&#45;&gt;6</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M129.331,-103.726C134.396,-95.0615 139.752,-85.8962 144.847,-77.1802\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"148.013,-78.6992 150.037,-68.2996 141.969,-75.167 148.013,-78.6992\"/>\r\n",
       "</g>\r\n",
       "<!-- 9 -->\r\n",
       "<g id=\"node10\" class=\"node\"><title>9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M390,-187C390,-187 313,-187 313,-187 307,-187 301,-181 301,-175 301,-175 301,-116 301,-116 301,-110 307,-104 313,-104 313,-104 390,-104 390,-104 396,-104 402,-110 402,-116 402,-116 402,-175 402,-175 402,-181 396,-187 390,-187\"/>\r\n",
       "<text text-anchor=\"start\" x=\"315.5\" y=\"-171.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">temp ≤ 2.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"309\" y=\"-156.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 1.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"312\" y=\"-141.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 2</text>\r\n",
       "<text text-anchor=\"start\" x=\"310\" y=\"-126.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"322.5\" y=\"-111.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 8&#45;&gt;9 -->\r\n",
       "<g id=\"edge9\" class=\"edge\"><title>8&#45;&gt;9</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M354.112,-222.907C353.827,-214.558 353.523,-205.671 353.227,-197.02\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"356.725,-196.895 352.885,-187.021 349.729,-197.135 356.725,-196.895\"/>\r\n",
       "</g>\r\n",
       "<!-- 12 -->\r\n",
       "<g id=\"node13\" class=\"node\"><title>12</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M509,-179.5C509,-179.5 432,-179.5 432,-179.5 426,-179.5 420,-173.5 420,-167.5 420,-167.5 420,-123.5 420,-123.5 420,-117.5 426,-111.5 432,-111.5 432,-111.5 509,-111.5 509,-111.5 515,-111.5 521,-117.5 521,-123.5 521,-123.5 521,-167.5 521,-167.5 521,-173.5 515,-179.5 509,-179.5\"/>\r\n",
       "<text text-anchor=\"start\" x=\"428\" y=\"-164.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"431\" y=\"-149.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 3</text>\r\n",
       "<text text-anchor=\"start\" x=\"429\" y=\"-134.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 3]</text>\r\n",
       "<text text-anchor=\"start\" x=\"441.5\" y=\"-119.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 8&#45;&gt;12 -->\r\n",
       "<g id=\"edge12\" class=\"edge\"><title>8&#45;&gt;12</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M395.399,-222.907C406.787,-211.321 419.194,-198.698 430.584,-187.111\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"433.386,-189.252 437.9,-179.667 428.394,-184.345 433.386,-189.252\"/>\r\n",
       "</g>\r\n",
       "<!-- 10 -->\r\n",
       "<g id=\"node11\" class=\"node\"><title>10</title>\r\n",
       "<path fill=\"#e58139\" stroke=\"black\" d=\"M358,-68C358,-68 281,-68 281,-68 275,-68 269,-62 269,-56 269,-56 269,-12 269,-12 269,-6 275,-0 281,-0 281,-0 358,-0 358,-0 364,-0 370,-6 370,-12 370,-12 370,-56 370,-56 370,-62 364,-68 358,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"277\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"280\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"278\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [1, 0]</text>\r\n",
       "<text text-anchor=\"start\" x=\"290.5\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 0</text>\r\n",
       "</g>\r\n",
       "<!-- 9&#45;&gt;10 -->\r\n",
       "<g id=\"edge10\" class=\"edge\"><title>9&#45;&gt;10</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M339.584,-103.726C337.132,-95.3351 334.543,-86.4745 332.068,-78.0072\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"335.396,-76.9163 329.231,-68.2996 328.677,-78.8799 335.396,-76.9163\"/>\r\n",
       "</g>\r\n",
       "<!-- 11 -->\r\n",
       "<g id=\"node12\" class=\"node\"><title>11</title>\r\n",
       "<path fill=\"#399de5\" stroke=\"black\" d=\"M477,-68C477,-68 400,-68 400,-68 394,-68 388,-62 388,-56 388,-56 388,-12 388,-12 388,-6 394,-0 400,-0 400,-0 477,-0 477,-0 483,-0 489,-6 489,-12 489,-12 489,-56 489,-56 489,-62 483,-68 477,-68\"/>\r\n",
       "<text text-anchor=\"start\" x=\"396\" y=\"-52.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">entropy = 0.0</text>\r\n",
       "<text text-anchor=\"start\" x=\"399\" y=\"-37.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1</text>\r\n",
       "<text text-anchor=\"start\" x=\"397\" y=\"-22.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0, 1]</text>\r\n",
       "<text text-anchor=\"start\" x=\"409.5\" y=\"-7.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">class = 1</text>\r\n",
       "</g>\r\n",
       "<!-- 9&#45;&gt;11 -->\r\n",
       "<g id=\"edge11\" class=\"edge\"><title>9&#45;&gt;11</title>\r\n",
       "<path fill=\"none\" stroke=\"black\" d=\"M383.896,-103.726C390.997,-94.7878 398.522,-85.3168 405.642,-76.3558\"/>\r\n",
       "<polygon fill=\"black\" stroke=\"black\" points=\"408.562,-78.3065 412.043,-68.2996 403.082,-73.9519 408.562,-78.3065\"/>\r\n",
       "</g>\r\n",
       "</g>\r\n",
       "</svg>\r\n"
      ],
      "text/plain": [
       "<graphviz.files.Source at 0x167f86d8>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_feature_name = dataset.columns[:-1]\n",
    "data_target_name = [str(x) for x in np.unique(dataset[dataset.columns[-1]])]\n",
    "dot_tree = export_graphviz(model_tree, out_file='mytree.pdf' ,feature_names=data_feature_name,\n",
    "                           class_names=data_target_name,filled=True, rounded=True,special_characters=True)\n",
    "\n",
    "with open('mytree.pdf') as f:\n",
    "    dot_graph = f.read()\n",
    "graphviz.Source(dot_graph)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
